In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
In [2]:
from sklearn.datasets import fetch_olivetti_faces
dataset = fetch_olivetti_faces(shuffle=True, random_state=0)
faces = dataset.data
faces.shape
Out[2]:
In [3]:
def plot_images(data):
fig, axes = plt.subplots(3, 4, subplot_kw={'xticks':(), 'yticks':()})
for i, ax in enumerate(axes.ravel()):
ax.imshow(data[i].reshape(64, 64), cmap="gray")
In [4]:
plot_images(faces)
In [5]:
from sklearn.decomposition import RandomizedPCA
pca = RandomizedPCA(n_components=12).fit(faces)
print(pca.components_.shape)
plot_images(pca.components_)
In [6]:
from sklearn.decomposition import FactorAnalysis
fa = FactorAnalysis(n_components=12).fit(faces)
print(fa.components_.shape)
plot_images(fa.components_)
In [7]:
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
fa_pipe = make_pipeline(FactorAnalysis(n_components=12), LogisticRegression())
pca_pipe = make_pipeline(RandomizedPCA(n_components=12), LogisticRegression())
In [8]:
from sklearn.cross_validation import cross_val_score
fa_scores = cross_val_score(fa_pipe, faces, dataset.target, cv=5)
pca_scores = cross_val_score(pca_pipe, faces, dataset.target, cv=5)
print("Factor analysis scores: %f PCA scores: %f" % (np.mean(fa_scores), np.mean(pca_scores)))
In [ ]: